*------------------------------------------------------*
* JJ created 21 / 04 / 2017 ---------------------------*
* JJ updated 21 / 04 / 2017 ---------------------------*
*------------------------------------------------------*

* This do-file accompanies Lecture 2 of the stata course. It illustrates how
* to access results after you have performed some estimations.

cd "C:\Users\j_jer\OneDrive\Documents\Teaching\Stata_Course\Lecture_2\Lecture_2_Data"

*-------------------------------------------------------------*
*1. Illustrate how to access summary statistics --------------*
*-------------------------------------------------------------*
	*1A. Open up the data file
	use "Ed_Career_HKG.dta" ///														/* Name of the Stata data file */
		, ///
		clear																		/* Clear any data already in memory */

	*1B. Produce summary statistics
	sum ESCS ///																	/* Produce summary statistics for the ESCS variable */
		, ///	
		d																			/* Produce detailed statistics.... */

	*1C. See what is available	
	return list
		
	*1D. Illustrate creating a new variable from returned results
	gen Mean_ESCS = r(mean)
	browse
	
	*1E. Illustrate replacing a variable from return results
	replace ESCS = ESCS + r(mean)

*-------------------------------------------------------------*
*2. TASK 1. Standardising the variable PV1MATH ---------------*
*-------------------------------------------------------------*
	*2A. Create summary statistics for the variable
	sum PV1MATH ///																	/* Produce summary statistics for the PV1MATH variable */
		, ///	
		d																			/* Produce detailed statistics.... */	
	
	*2B. Replace the variable
	replace PV1MATH ///																/* Name of the variable to replace */
		= (PV1MATH - r(mean)) / r(sd)												/* Substract the mean and divide by the standard deviation */ 
	
	*2C. Create summary statistics for the variable
	*(Should now be mean 0 and standard deviation 1)
	sum PV1MATH ///																	/* Produce summary statistics for the PV1MATH variable */
		, ///	
		d				
		
*-------------------------------------------------------------*
*3. Creating regression coefficients and exporting to Excel --*
*-------------------------------------------------------------*		
	*3A. Run a regression model
	regress PV1MATH ESCS
	
	*3B. Create new variables with the mean and standard error in
	gen ESCS_Beta = _b[ESCS]
	gen ESCS_SE = _se[ESCS]

	*3C. Export to Excel
	*This creates a new Excel file with the regression coefficient and standard error 
	export excel ///																/* Export the results to Excel */
		ESCS_Beta ///																/* Export the regression coefficient */
		ESCS_SE ///																	/* Export the standard error */
			using ///														
			"ESCS_Regression_Coefficients" in 1 ///									/* Name of the Excel file. Export the first observation only */
				, ///
				sheet(Results) ///													/* Name of the sheet in the Excel file */
				sheetmodify ///														/* Allow the sheet to be modified if it already exists */
				firstrow(variables) 												/* First row of the Excel file to include the variable names */ 
		

*-------------------------------------------------------------*
*4. Creating regression coefficients and exporting to Excel --*
*-------------------------------------------------------------*		
	*4A. Run the regreesion mode
	regress PV1MATH ///																/* Estimate the regression model. Maths score = dependent variable */
		ESCS ///																	/* ESCS as a covariate */
		PV1READ																		/* Control for reading scores */
				
	*4B. Create matrix of regression coefficent and print it
	matrix beta = e(b)					 											/* Create matrix of coefficients */
	matrix list beta																/* Print the beta matrix */
	
	*4C. Create varience - covarience matrix and print it	
	matrix var_mat = e(V)				  											/* Create varience-covarience matrix */
	matrix list var_mat																/* Print the varience - covarience matrix */ 
		
	*4D. Create a new scalar with the value of the ESCS variable	 	
	scalar escs_beta ///															/* Create a new scalar */
		=  ///
		beta[1,1]																	/* Contains value in first column and first row of the beta matrix */
	
	*4E. Display / Create new variable
		*i. Display the variable
		display escs_beta
		
		*ii. Create a new version of the ESCS_Beta variable
		gen ESCS_Beta2 = escs_beta 
		
*------------------------------------------------------------------------------*
*5. TASK 2. Using the information in the variance –covariance matrix only, 
* please create a new variable which contains the standard error of the 
* PV1READ coefficient.
*------------------------------------------------------------------------------*		
	*5A. Create scalar with the varience in.....
	scalar read_var ///															/* Create a new scalar */
		=  ///
		var_mat[2,2]	
		
	*5B. Create the standard error......
	gen READ_SE = sqrt(read_var)
		
	*5C. Check that the two are the same.....	
	display _se[PV1READ]
	display READ_SE
	
*------------------------------------------------------------------------------*
*6. Making linear predictions.  
*------------------------------------------------------------------------------*	
	*6A. Estimate the OLS model
	regress PV1MATH ///																/* Estimate the regression model. Maths score = dependent variable */
		PV1READ	///																	/* Reading score as the only covariate */
			if ESCS < -1															/* Only if ESCS score is less than zero (low SES pupils only) */ 
	
	*6B. Create linear predictions for ALL pupils
	predict PREDICTED_MATHS_SCORE
	sum PREDICTED_MATHS_SCORE , d											/* Note that the N is different to the N in the model (out of sample predictions being made) */
	
	*6C. Look at scatterplot between expected and actual maths scores
	scatter PV1MATH PREDICTED_MATHS_SCORE
	
	*6D. This shows that you have predictions for everyone! 
	* (Not just where ESCS <-1 as per the regression command!)
	sum PREDICTED_MATHS_SCORE if ESCS < -1 , d
	sum PREDICTED_MATHS_SCORE if ESCS > -1 , d
	
	*6E. Create predicted values only for those in the analysis model
	predict PREDICTED_MATHS_SCORE_2 ///
		if e(sample) 

	*6F. Create and look at the residuals
		*i. Create residuals
		predict resid ///
			, ///
			residuals
	
		*ii. Are residuals normally distributed?
		kdensity ///
			resid  ///
			, ///
			normal
	
*------------------------------------------------------------------------------*
*7. Storing and restoring estimates in memory  --------------------------------*
*------------------------------------------------------------------------------*	
	*7A. Estimate regression model 1 and store estimates
		*i. Estimate the model
		regress PV1MATH ///															/* Estimate the regression model. Maths score = dependent variable */
				ESCS 	
	
		*ii. Store estimates
		estimates store ///															/* Store the first set of estimates */
			model_1																	/* Name of where I am storing the estimates */
	
		*iii. ereturn list
		ereturn list
	
		*iv. Create a new variable
		gen BETA_7 = _b[ESCS]
	
		*v. Display the value of the variable
		display BETA_7
		
	*7B. Estimate regression model 2 and store estimates
		*i. Estimate the model
		regress PV1MATH ///															/* Estimate the regression model. Maths score = dependent variable */
				ESCS ///	
				PV1READ	
	
		*ii. ereturn list
		ereturn list

		*Note how the ereturn list now includes the latest estimation results.
		*To return the results from 7A instead we can now to the following:
		
	*7C. Restore the estimates from model 1
		*i. Restore the estimates
		estimates restore model_1

		*ii. ereturn list
		ereturn list		

		*iii. Create a new variable
		gen BETA_7_COPY = _b[ESCS]		
		
		*iv. Use assert to check this is identical to the value that we got previously
		assert BETA_7 == BETA_7_COPY

	*7D. Clear all stored estimates from memory	
	estimates clear
	
*------------------------------------------------------------------------------*
*8. Creating estimate tables --------------------------------------------------*
*------------------------------------------------------------------------------*	
	*8A. Estimate regression model 1 and store estimates
		*i. Estimate the model
		regress PV1MATH ///															/* Estimate the regression model. Maths score = dependent variable */
				ESCS 	
	
		*ii. Store estimates
		estimates store ///															/* Store the first set of estimates */
			model_1																	/* Name of where I am storing the estimates */
	
	*8B. Estimate regression model 2 and store estimates
		*i. Estimate the model
		regress PV1MATH ///															/* Estimate the regression model. Maths score = dependent variable */
				ESCS ///	
				PV1READ
	
		*ii. Store estimates
		estimates store ///															/* Store the second set of estimates */
			model_2																	/* Name of where I am storing the estimates */	

	*8C. Create a table of the estimates
	 estimates table ///															/* Create table of estimates */
		model_1 ///																	/* Name of where first set of estimates stored */
		model_2 ///																	/* Name of where second set of estimates stored */
			, ///
			se ///																		/* Include the standard errors */
			stats(N r2)
			
	*8D. Returns to you in a matrix the coefficients
	return list
			
			
